Clean up and fix VCPU hotplug and SMP save/restore.

author kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>

Tue, 28 Mar 2006 17:43:30 +0000 (18:43 +0100)

committer kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>

Tue, 28 Mar 2006 17:43:30 +0000 (18:43 +0100)
author kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Tue, 28 Mar 2006 17:43:30 +0000 (18:43 +0100)
committer kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Tue, 28 Mar 2006 17:43:30 +0000 (18:43 +0100)
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c

index e63e40465dbb0df5abcf8f86a466bf703091b4d0..f0f1d803a56abb4b33c3558271f974676e05b1ab 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
@@ -15,6 +15,7 @@
  #include <xen/xenbus.h>
  #include <linux/cpu.h>
  #include <linux/kthread.h>
+#include <xen/gnttab.h>
  #include <xen/xencons.h>
  
  #if defined(__i386__) || defined(__x86_64__)
@@ -76,31 +77,24 @@ static int shutting_down = SHUTDOWN_INVALID;
  static void __shutdown_handler(void *unused);
  static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
  
-#ifndef CONFIG_HOTPLUG_CPU
-#define cpu_down(x) (-EOPNOTSUPP)
-#define cpu_up(x) (-EOPNOTSUPP)
+#ifdef CONFIG_SMP
+int  smp_suspend(void);
+void smp_resume(void);
+#else
+#define smp_suspend()  (0)
+#define smp_resume()   ((void)0)
  #endif
  
-
  static int __do_suspend(void *ignore)
  {
-       int i, j, k, fpp;
+       int i, j, k, fpp, err;
  
         extern unsigned long max_pfn;
         extern unsigned long *pfn_to_mfn_frame_list_list;
         extern unsigned long *pfn_to_mfn_frame_list[];
  
-       extern int gnttab_suspend(void);
-       extern int gnttab_resume(void);
         extern void time_resume(void);
  
-#ifdef CONFIG_SMP
-       cpumask_t prev_online_cpus;
-       int vcpu_prepare(int vcpu);
-#endif
-
-       int err = 0;
-
         BUG_ON(smp_processor_id() != 0);
         BUG_ON(in_interrupt());
  
@@ -110,40 +104,12 @@ static int __do_suspend(void *ignore)
                 return -EOPNOTSUPP;
         }
  
-#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
-       if (num_online_cpus() > 1) {
-               printk(KERN_WARNING "Can't suspend SMP guests "
-                      "without CONFIG_HOTPLUG_CPU\n");
-               return -EOPNOTSUPP;
-       }
-#endif
+       err = smp_suspend();
+       if (err)
+               return err;
  
         xenbus_suspend();
  
-       lock_cpu_hotplug();
-#ifdef CONFIG_SMP
-       /*
-        * Take all other CPUs offline. We hold the hotplug mutex to
-        * avoid other processes bringing up CPUs under our feet.
-        */
-       cpus_clear(prev_online_cpus);
-       while (num_online_cpus() > 1) {
-               for_each_online_cpu(i) {
-                       if (i == 0)
-                               continue;
-                       unlock_cpu_hotplug();
-                       err = cpu_down(i);
-                       lock_cpu_hotplug();
-                       if (err != 0) {
-                               printk(KERN_CRIT "Failed to take all CPUs "
-                                      "down: %d.\n", err);
-                               goto out_reenable_cpus;
-                       }
-                       cpu_set(i, prev_online_cpus);
-               }
-       }
-#endif
-
         preempt_disable();
  
  #ifdef __i386__
@@ -153,7 +119,6 @@ static int __do_suspend(void *ignore)
  
         __cli();
         preempt_enable();
-       unlock_cpu_hotplug();
  
         gnttab_suspend();
  
@@ -203,30 +168,9 @@ static int __do_suspend(void *ignore)
  
         xencons_resume();
  
-#ifdef CONFIG_SMP
-       for_each_cpu(i)
-               vcpu_prepare(i);
-
-#endif
-
-       /*
-        * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
-        * the VCPU hotplug callback can race with our vcpu_prepare
-        */
         xenbus_resume();
  
-#ifdef CONFIG_SMP
- out_reenable_cpus:
-       for_each_cpu_mask(i, prev_online_cpus) {
-               j = cpu_up(i);
-               if ((j != 0) && !cpu_online(i)) {
-                       printk(KERN_CRIT "Failed to bring cpu "
-                              "%d back up (%d).\n",
-                              i, j);
-                       err = j;
-               }
-       }
-#endif
+       smp_resume();
  
         return err;
  }
@@ -334,7 +278,6 @@ static void shutdown_handler(struct xenbus_watch *watch,
         kfree(str);
  }
  
-#ifdef CONFIG_MAGIC_SYSRQ
  static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
                           unsigned int len)
  {
@@ -360,45 +303,35 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
         if (err == -EAGAIN)
                 goto again;
  
-       if (sysrq_key != '\0') {
+#ifdef CONFIG_MAGIC_SYSRQ
+       if (sysrq_key != '\0')
                 handle_sysrq(sysrq_key, NULL, NULL);
-       }
-}
  #endif
+}
  
  static struct xenbus_watch shutdown_watch = {
         .node = "control/shutdown",
         .callback = shutdown_handler
  };
  
-#ifdef CONFIG_MAGIC_SYSRQ
  static struct xenbus_watch sysrq_watch = {
         .node ="control/sysrq",
         .callback = sysrq_handler
  };
-#endif
  
  static int setup_shutdown_watcher(struct notifier_block *notifier,
                                    unsigned long event,
                                    void *data)
  {
-       int err1 = 0;
-#ifdef CONFIG_MAGIC_SYSRQ
-       int err2 = 0;
-#endif
-
-       err1 = register_xenbus_watch(&shutdown_watch);
-#ifdef CONFIG_MAGIC_SYSRQ
-       err2 = register_xenbus_watch(&sysrq_watch);
-#endif
+       int err;
  
-       if (err1)
+       err = register_xenbus_watch(&shutdown_watch);
+       if (err)
                 printk(KERN_ERR "Failed to set shutdown watcher\n");
  
-#ifdef CONFIG_MAGIC_SYSRQ
-       if (err2)
+       err = register_xenbus_watch(&sysrq_watch);
+       if (err)
                 printk(KERN_ERR "Failed to set sysrq watcher\n");
-#endif
  
         return NOTIFY_DONE;
  }
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c

index fb0b822b5cbb0be9fd4b0e658601114f721ff03a..1f588c59eec26524d13f016ee656cdd22f27e2f6 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
@@ -79,6 +79,15 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
  unsigned int maxcpus = NR_CPUS;
  #endif
  
+/*
+ * Set of CPUs that remote admin software will allow us to bring online.
+ * Notified to us via xenbus.
+ */
+static cpumask_t xenbus_allowed_cpumask;
+
+/* Set of CPUs that local admin will allow us to bring online. */
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
+
  void __init prefill_possible_map(void)
  {
         int i, rc;
@@ -146,7 +155,7 @@ static void cpu_bringup(void)
         cpu_idle();
  }
  
-void vcpu_prepare(int vcpu)
+static void vcpu_prepare(int vcpu)
  {
         vcpu_guest_context_t ctxt;
         struct task_struct *idle = idle_task(vcpu);
@@ -278,6 +287,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
                 vcpu_prepare(cpu);
         }
  
+       xenbus_allowed_cpumask = cpu_present_map;
+
         /* Currently, Xen gives no dynamic NUMA/HT info. */
         for (cpu = 1; cpu < NR_CPUS; cpu++) {
                 cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
@@ -301,6 +312,15 @@ void __devinit smp_prepare_boot_cpu(void)
         cpu_online_map   = cpumask_of_cpu(0);
  }
  
+static int local_cpu_hotplug_request(void)
+{
+       /*
+        * We assume a CPU hotplug request comes from local admin if it is made
+        * via a userspace process (i.e., one with a real mm_struct).
+        */
+       return (current->mm != NULL);
+}
+
  #ifdef CONFIG_HOTPLUG_CPU
  
  /*
@@ -331,8 +351,10 @@ static void vcpu_hotplug(unsigned int cpu)
         }
  
         if (strcmp(state, "online") == 0) {
+               cpu_set(cpu, xenbus_allowed_cpumask);
                 (void)cpu_up(cpu);
         } else if (strcmp(state, "offline") == 0) {
+               cpu_clear(cpu, xenbus_allowed_cpumask);
                 (void)cpu_down(cpu);
         } else {
                 printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
@@ -353,6 +375,22 @@ static void handle_vcpu_hotplug_event(
         }
  }
  
+static int smpboot_cpu_notify(struct notifier_block *notifier,
+                             unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+
+       /*
+        * We do this in a callback notifier rather than __cpu_disable()
+        * because local_cpu_hotplug_request() does not work in the latter
+        * as it's always executed from within a stopmachine kthread.
+        */
+       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
+               cpu_clear(cpu, local_allowed_cpumask);
+
+       return NOTIFY_OK;
+}
+
  static int setup_cpu_watcher(struct notifier_block *notifier,
                               unsigned long event, void *data)
  {
@@ -360,7 +398,8 @@ static int setup_cpu_watcher(struct notifier_block *notifier,
  
         static struct xenbus_watch cpu_watch = {
                 .node = "cpu",
-               .callback = handle_vcpu_hotplug_event };
+               .callback = handle_vcpu_hotplug_event,
+               .flags = XBWF_new_thread };
         (void)register_xenbus_watch(&cpu_watch);
  
         if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
@@ -375,14 +414,62 @@ static int setup_cpu_watcher(struct notifier_block *notifier,
  
  static int __init setup_vcpu_hotplug_event(void)
  {
+       static struct notifier_block hotplug_cpu = {
+               .notifier_call = smpboot_cpu_notify };
         static struct notifier_block xsn_cpu = {
                 .notifier_call = setup_cpu_watcher };
+
+       register_cpu_notifier(&hotplug_cpu);
         register_xenstore_notifier(&xsn_cpu);
+
         return 0;
  }
  
  arch_initcall(setup_vcpu_hotplug_event);
  
+int smp_suspend(void)
+{
+       int i, err;
+
+       lock_cpu_hotplug();
+
+       /*
+        * Take all other CPUs offline. We hold the hotplug mutex to
+        * avoid other processes bringing up CPUs under our feet.
+        */
+       while (num_online_cpus() > 1) {
+               unlock_cpu_hotplug();
+               for_each_online_cpu(i) {
+                       if (i == 0)
+                               continue;
+                       err = cpu_down(i);
+                       if (err) {
+                               printk(KERN_CRIT "Failed to take all CPUs "
+                                      "down: %d.\n", err);
+                               for_each_cpu(i)
+                                       vcpu_hotplug(i);
+                               return err;
+                       }
+               }
+               lock_cpu_hotplug();
+       }
+
+       return 0;
+}
+
+void smp_resume(void)
+{
+       int i;
+
+       for_each_cpu(i)
+               vcpu_prepare(i);
+
+       unlock_cpu_hotplug();
+
+       for_each_cpu(i)
+               vcpu_hotplug(i);
+}
+
  int __cpu_disable(void)
  {
         cpumask_t map = cpu_online_map;
@@ -415,6 +502,20 @@ void __cpu_die(unsigned int cpu)
  
  #else /* !CONFIG_HOTPLUG_CPU */
  
+int smp_suspend(void)
+{
+       if (num_online_cpus() > 1) {
+               printk(KERN_WARNING "Can't suspend SMP guests "
+                      "without CONFIG_HOTPLUG_CPU\n");
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+void smp_resume(void)
+{
+}
+
  int __cpu_disable(void)
  {
         return -ENOSYS;
@@ -429,6 +530,20 @@ void __cpu_die(unsigned int cpu)
  
  int __devinit __cpu_up(unsigned int cpu)
  {
+       int rc;
+
+       if (local_cpu_hotplug_request()) {
+               cpu_set(cpu, local_allowed_cpumask);
+               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
+                       printk("%s: attempt to bring up CPU %u disallowed by "
+                              "remote admin.\n", __FUNCTION__, cpu);
+                       return -EBUSY;
+               }
+       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
+                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
+               return -EBUSY;
+       }
+
  #ifdef CONFIG_SMP_ALTERNATIVES
         if (num_online_cpus() == 1)
                 prepare_for_smp();
@@ -436,7 +551,9 @@ int __devinit __cpu_up(unsigned int cpu)
  
         xen_smp_intr_init(cpu);
         cpu_set(cpu, cpu_online_map);
-       if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL) != 0)
+
+       rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
+       if (rc != 0)
                 BUG();
  
         return 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c

index f338de1f375a147e8e330bf41e36bbe78a9584fd..c64d2500671210de7ef5bc2b9740b25b2f2288e3 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
@@ -16,7 +16,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
  {
         struct pciback_device *pdev;
  
-       pdev = kmalloc(sizeof(struct pciback_device), GFP_KERNEL);
+       pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
         if (pdev == NULL)
                 goto out;
         dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c

index 7e343eff212e9456794e0ddfb54cdf66e633b56a..2cb2e97a879dbf5e8f5817376f870f4bb7acefa7 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
@@ -685,6 +685,24 @@ void xs_resume(void)
         up_write(&xs_state.suspend_mutex);
  }
  
+static int xenwatch_handle_callback(void *data)
+{
+       struct xs_stored_msg *msg = data;
+
+       msg->u.watch.handle->callback(msg->u.watch.handle,
+                                     (const char **)msg->u.watch.vec,
+                                     msg->u.watch.vec_size);
+
+       kfree(msg->u.watch.vec);
+       kfree(msg);
+
+       /* Kill this kthread if we were spawned just for this callback. */
+       if (current->pid != xenwatch_pid)
+               do_exit(0);
+
+       return 0;
+}
+
  static int xenwatch_thread(void *unused)
  {
         struct list_head *ent;
@@ -707,12 +725,11 @@ static int xenwatch_thread(void *unused)
  
                 if (ent != &watch_events) {
                         msg = list_entry(ent, struct xs_stored_msg, list);
-                       msg->u.watch.handle->callback(
-                               msg->u.watch.handle,
-                               (const char **)msg->u.watch.vec,
-                               msg->u.watch.vec_size);
-                       kfree(msg->u.watch.vec);
-                       kfree(msg);
+                       if (msg->u.watch.handle->flags & XBWF_new_thread)
+                               kthread_run(xenwatch_handle_callback,
+                                           msg, "xenwatch_cb");
+                       else
+                               xenwatch_handle_callback(msg);
                 }
  
                 mutex_unlock(&xenwatch_mutex);
diff --git a/linux-2.6-xen-sparse/include/xen/gnttab.h b/linux-2.6-xen-sparse/include/xen/gnttab.h

index 584a5db3556224dffddf8b48507607e36569b8be..e9fd1b63ae81b96c5a17964c6322ff5f0392ffd5 100644 (file)
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h
@@ -110,6 +110,9 @@ void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
  #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
  #endif
  
+int gnttab_suspend(void);
+int gnttab_resume(void);
+
  #endif /* __ASM_GNTTAB_H__ */
  
  /*
diff --git a/linux-2.6-xen-sparse/include/xen/xenbus.h b/linux-2.6-xen-sparse/include/xen/xenbus.h

index 122480584ff9de98723921ca92514bf273a09277..6d69963870b52c87fdb186ea26e33267f6c18ea8 100644 (file)
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h
@@ -55,8 +55,17 @@ struct xenbus_watch
         /* Callback (executed in a process context with no locks held). */
         void (*callback)(struct xenbus_watch *,
                          const char **vec, unsigned int len);
+
+       /* See XBWF_ definitions below. */
+       unsigned long flags;
  };
  
+/*
+ * Execute callback in its own kthread. Useful if the callback is long
+ * running or heavily serialised, to avoid taking out the main xenwatch thread
+ * for a long period of time (or even unwittingly causing a deadlock).
+ */
+#define XBWF_new_thread        1
  
  /* A xenbus device. */
  struct xenbus_device {
author	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
	Tue, 28 Mar 2006 17:43:30 +0000 (18:43 +0100)
committer	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
	Tue, 28 Mar 2006 17:43:30 +0000 (18:43 +0100)
linux-2.6-xen-sparse/drivers/xen/core/reboot.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c		patch \| blob \| history
linux-2.6-xen-sparse/include/xen/gnttab.h		patch \| blob \| history
linux-2.6-xen-sparse/include/xen/xenbus.h		patch \| blob \| history